%matplotlib inline
path_scripts = '/mnt/kauffman/joosts/projects/STRT_epidermis/scripts'
import sys
sys.path.append(path_scripts)
from EPI_misc_scripts_v1_1 import *
from EPI_affinity_propagation_v1_0 import *
from EPI_neg_binom_regression_v1_1 import *
from EPI_pseudotemporal_ordering_v1_0 import *
from matplotlib.gridspec import GridSpec
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import ward
from ipyparallel import Client
c = Client(profile='default')
dview = c[:]
dview.execute('import sys')
dview.execute('sys.path.append("/mnt/kauffman/joosts/projects/STRT_epidermis/scripts")')
dview.execute('from EPI_misc_scripts_v1_1 import *')
dview.execute('from EPI_affinity_propagation_v1_0 import *')
dview.execute('from EPI_neg_binom_regression_v1_1 import *')
dview.execute('from EPI_pseudotemporal_ordering_v1_0 import *')
exp_id = '201509151726'
path_input = '/mnt/kauffman/joosts/projects/STRT_epidermis/data_input/v1.8'
path_output = '/mnt/kauffman/joosts/projects/STRT_epidermis/data_output/v1.8'
path_figures = '/mnt/kauffman/joosts/projects/STRT_epidermis/figures/v1.8'
seq = loadData_v1(path_input, exp_id, 'seq', 'DataFrame')
meta = loadData_v1(path_input, exp_id, 'meta', 'DataFrame')
seq_sel_1st = loadData_v1(path_output, exp_id, 'seq_sel_1st', 'DataFrame')
s_groups_1st = loadData_v1(path_output, exp_id, 's_groups_1st', 'Series')
g_groups_1st = loadData_v1(path_output, exp_id, 'g_groups_1st', 'Series')
cmap_1st = {2:'#00CC00',
1:'#FFE000',
8:'#FF9900',
0:'#FF3300',
10:'#CC0000',
6:'#cab2d6',
3:'#A68BC2',
4:'#65429A',
5:'#000066',
7:'#0066FF',
9:'#33CCCC',
11:'#FF99CC',
12:'#660033'}
#Create colormap and colorlist for hair cycle stage data
cmap_hair_cycle = {'mid telogen (Ki67)': '#00CC00', 'late telogen (Ki67)': '#FFCC00'}
#Create colormap and colorlist for Sca1 data
cmap_Sca1 = {'1': '#FF0000', '0': '#0066FF'}
#Create namemap for sample groups:
nmap_1st = {2:'IFE basal cells (IFE B)',
1:'IFE differentiated cells I (IFE DI)',
8:'IFE differentiated cells II (IFE DII)',
0:'IFE keratinized layer I (IFE KI)',
10:'IFE keratinized layer II (IFE KII)',
6:'Upper hair follicle I (uHF I)',
3:'Upper hair follicle II (uHF II)',
4:'Upper hair follicle III (uHF III)',
5:'Sebaceous gland (SG)',
7:'Outer bulge (OB)',
9:'Inner bulge (IB)',
11:'T cells (TC)',
12:'Langerhans cells (LH)'}
nmap_1st_short = {2:'IFE B',
1:'IFE DI',
8:'IFE DII',
0:'IFE KI',
10:'IFE KII',
6:'uHF I',
3:'uHF II',
4:'uHF III',
5:'SG',
7:'OB',
9:'IB',
11:'TC',
12:'LH'}
gmap_1st = {2:'Krt14(hi), Mt2(hi)',
1:'Krt10(dim), Ptgs1(dim)',
8:'Krt10(hi), Ptgs1(hi)',
0:'Lor(dim), Flg2(dim)',
10:'Lor(hi), Flg2(hi)',
6:'Krt79(low), Krt17(low)',
3:'Krt79(dim), Krt17(dim)',
4:'Krt79(hi), Krt17(hi)',
5:'Mgst1(hi), Scd1(hi)',
7:'Postn(hi), Cd34(hi)',
9:'Krt6a(hi), Krt75(hi)',
11:'Cd3(hi), Thy1(hi)',
12:'Cd207(hi), Ctss(hi)'}
seq_sel_1st = dropNull(seq, path_input, cutoff_mean = 0.05)
len(seq_sel_1st.index)
seq_sel_1st = cellCutoff(seq_sel_1st, 2000)
len(seq_sel_1st.columns)
seq_sel_1st, corr_filt_1st, log2_cv_diff_1st, xopt_1st = select_features_v1(seq_sel_1st, 0.05, 5, 2500, path_input, percentile=95,
return_all=True, drop_spikes = False, drop_repeats = False)
selected = seq_sel_1st.index
data_mean = seq.ix[log2_cv_diff_1st.index, s_groups_1st.index].mean(axis = 1)
data_cv = seq.ix[log2_cv_diff_1st.index, s_groups_1st.index].std(axis = 1) / data_mean
log2_mean = np.log2(data_mean)
log2_cv = np.log2(data_cv)
line_x = np.arange(log2_mean.min(), log2_mean.max(), 0.01)
line_y = np.log2(( 2 ** line_x) ** xopt_1st[0] + xopt_1st[1])
clist = pd.Series('silver', index = log2_mean.index)
clist[log2_cv_diff_1st[log2_cv_diff_1st > 0].index] = 'silver'
if np.all(selected) != None:
clist[selected] = 'green'
fig = plt.figure(figsize = [10,10], facecolor = 'w')
ax0 = plt.axes()
ax0.set_xlabel('Average number of transcripts [$\log_2$]', family = 'Liberation Sans', fontsize = 40)
ax0.xaxis.set_label_coords(x = 0.5, y = -0.075)
for tick in ax0.xaxis.get_major_ticks():
tick.label.set_fontsize(35)
tick.label.set_family('Liberation Sans')
ax0.tick_params(axis='x', which='major', pad=10)
ax0.set_ylabel('Coefficient of variation (CV) [$\log_2$]', family = 'Liberation Sans', fontsize = 40)
ax0.yaxis.set_label_coords(y = 0.5, x = -0.075)
for tick in ax0.yaxis.get_major_ticks():
tick.label.set_fontsize(35)
tick.label.set_family('Liberation Sans')
ax0.tick_params(axis='y', which='major', pad=10)
ax0.set_xlim(log2_mean.min() - 0.5, log2_mean.max() + 0.5)
ax0.set_ylim(-1, 5.5)
ax0.scatter(log2_mean, log2_cv, c = clist, linewidth = 0, s= 50, edgecolor = clist)
ax0.plot(line_x, line_y, c = 'r', linewidth = 5)
ax0.text(-3, 5.15, '$\log_{2}(CV) = log_{2}(mean^{-0.52}+0.63)$', family = 'Liberation Sans', fontsize = 30, va = 'center')
ax0.plot([-4, -3.5], [5.15, 5.15], c = 'r', linewidth = 5)
figname = 'v1.8_S2_A_Feature_selection.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
seq_s_corr = log2Transform(seq).ix[g_groups_1st.index, s_groups_1st.index].corr()
cmap = plt.cm.jet
#generate figure
fig = plt.figure(facecolor = 'w', figsize = (10.625, 10))
gs = plt.GridSpec(nrows = 2, ncols = 3, wspace=0.02, hspace=0.02, width_ratios=[0.5 , 11.25, 0.5], height_ratios=[11.25, 0.5])
#plot heatmap
axHM = plt.subplot(gs[0, 1])
axHM.matshow(seq_s_corr, cmap = cmap, aspect = 'auto', vmin = 0.2, vmax = 0.8, interpolation = None)
axHM.set_xlabel('Cells (#0 - %s)' % len(s_groups_1st), family = 'Liberation Sans', fontsize = 40)
axHM.set_ylabel('Cells (#0 - %s)' % len(s_groups_1st), family = 'Liberation Sans', fontsize = 40)
axHM.xaxis.set_label_coords(0.5, -0.075)
axHM.yaxis.set_label_coords(-0.075, 0.5)
axHM.set_yticks([])
clean_axis(axHM)
#plot left sample groups
axSG1 = plt.subplot(gs[0, 0])
axSG1.set_ylim(len(s_groups_1st), 0)
for pos, gr in enumerate(s_groups_1st):
axSG1.axhspan(pos, pos+1, color=cmap_1st[gr])
remove_ticks(axSG1)
clean_axis(axSG1)
#plot bottom sample groups
axSG2 = plt.subplot(gs[1, 1])
axSG2.set_xlim(0, len(s_groups_1st))
for pos, gr in enumerate(s_groups_1st):
axSG2.axvspan(pos, pos+1, color=cmap_1st[gr])
remove_ticks(axSG2)
clean_axis(axSG2)
#plot label
axLabel = plt.subplot(gs[0,2])
axLabel.matshow(np.matrix(np.arange(0.8, 0.2, -0.001)).T,
cmap = cmap, aspect = 'auto', vmin = 0.2, vmax = 0.8)
axLabel.xaxis.set_ticks([])
axLabel.yaxis.set_ticks_position('right')
clean_axis(axLabel)
axLabel.set_yticks(np.linspace(axLabel.get_ylim()[0], axLabel.get_ylim()[1], 7))
axLabel.set_yticklabels(['0.2','0.3','0.4','0.5','0.6','0.7','0.8'], family = 'Liberation Sans', fontsize = 35, va = 'center')
axLabel.tick_params(axis='y', which='major', pad=10)
axLabel.set_ylabel('Pearson correlation', family = 'Liberation Sans', fontsize = 40)
axLabel.yaxis.set_label_coords(5.5, 0.5)
figname = 'v1.8_S2_B_Cell_clustering.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
seq_g_corr = log2Transform(seq).ix[g_groups_1st.index,s_groups_1st.index].T.corr()
cmap = plt.cm.jet
#generate figure
fig = plt.figure(facecolor = 'w', figsize = (10.625, 10))
gs = plt.GridSpec(nrows = 2, ncols = 3, wspace=0.02, hspace=0.02, width_ratios=[0.5 , 11.25, 0.5], height_ratios=[11.25, 0.5])
#plot heatmap
axHM = plt.subplot(gs[0, 1])
axHM.matshow(seq_g_corr, cmap = cmap, aspect = 'auto', vmin = 0.0, vmax = 0.5, interpolation = None)
axHM.set_xlabel('Genes (#0 - %s)' % len(g_groups_1st.index), family = 'Liberation Sans', fontsize = 40)
axHM.set_ylabel('Genes (#0 - %s)' % len(g_groups_1st.index), family = 'Liberation Sans', fontsize = 40)
axHM.xaxis.set_label_coords(0.5, -0.025)
axHM.yaxis.set_label_coords(-0.025, 0.5)
axHM.set_yticks([])
clean_axis(axHM)
#plot label
axLabel = plt.subplot(gs[0,2])
axLabel.matshow(np.matrix(np.arange(0.5, 0.0, -0.001)).T,
cmap = cmap, aspect = 'auto', vmin = 0.0, vmax = 0.5)
axLabel.xaxis.set_ticks([])
axLabel.yaxis.set_ticks_position('right')
clean_axis(axLabel)
axLabel.set_yticks(np.linspace(axLabel.get_ylim()[0], axLabel.get_ylim()[1], 6))
axLabel.set_yticklabels(['0.0','0.1','0.2','0.3','0.4','0.5'], family = 'Liberation Sans', fontsize = 35, va = 'center')
axLabel.tick_params(axis='y', which='major', pad=10)
axLabel.set_ylabel('Pearson correlation', family = 'Liberation Sans', fontsize = 40)
axLabel.yaxis.set_label_coords(5.5, 0.5)
figname = 'v1.8_S2_C_Gene_clustering.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
#Transfer the expression data to relative values (0 [min] - 1 [max])
seq_sel = log2Transform(seq).ix[g_groups_1st.index, s_groups_1st.index]
seq_sel_= seq_sel.apply(lambda x: x / max(x), axis = 1)
cmap = plt.cm.jet
#create figure
bar_size = 0.75 #size of group and metadata bars
bar_pad = 0.1 #pad size of group and metadata bars
width = 29.0
height = 40.0
#height = ((width - bar_size) / len(s_groups)) * len(g_groups) + 5 * bar_size
plt.figure(figsize=(width, height), facecolor = 'w')
#generate main axes
axHM = plt.axes()
axHM.set_position([0.00, 0.00, 1.0, 1.0])
#plot heatmap
axHM.matshow(seq_sel_,
cmap = cmap,
aspect = 'auto',
interpolation = 'nearest',
vmin = 0,
vmax = 1)
remove_ticks(axHM, linewidth = 1)
clean_axis(axHM)
#create axes divider
divider = make_axes_locatable(axHM)
#draw cell/sample groups
axSG = divider.append_axes("bottom", size= bar_size, pad=bar_pad)
axSG.set_xlim(0, len(s_groups_1st))
for pos, gr in enumerate(s_groups_1st):
axSG.axvspan(pos, pos+1, color=cmap_1st[gr])
remove_ticks(axSG, linewidth = 1)
clean_axis(axSG)
#draw Sca1 metadata
axSCA = divider.append_axes("top", size= bar_size, pad=bar_pad)
axSCA.set_xlim(0, len(s_groups_1st))
for pos, ix in enumerate(s_groups_1st.index):
axSCA.axvspan(pos, pos+1, color=cmap_Sca1[meta.ix['green_flag', ix]])
remove_ticks(axSCA, linewidth = 1)
clean_axis(axSCA)
#draw hair cycle metadata
axHC = divider.append_axes("top", size= bar_size, pad=bar_pad)
axHC.set_xlim(0, len(s_groups_1st))
for pos, ix in enumerate(s_groups_1st.index):
axHC.axvspan(pos, pos+1, color=cmap_hair_cycle[meta.ix['hair_cycle_stage', ix]])
remove_ticks(axHC, linewidth = 1)
clean_axis(axHC)
#draw cell size metadata
axSIZE = divider.append_axes("top", size= bar_size, pad=bar_pad)
axSIZE.set_xlim(0, len(s_groups_1st))
cmapSIZE = plt.cm.Greens
SIZE_max = meta.ix['diameter(um)', s_groups_1st.index].astype(float).max()
for pos, ix in enumerate(s_groups_1st.index):
val = float(meta.ix['diameter(um)', ix])
axSIZE.axvspan(pos, pos+1, color=cmapSIZE(val/SIZE_max))
remove_ticks(axSIZE, linewidth = 1)
clean_axis(axSIZE)
#draw transcript metadata
axTRANS = divider.append_axes("top", size= bar_size, pad=bar_pad)
axTRANS.set_xlim(0, len(s_groups_1st))
cmapTRANS = plt.cm.Reds
TRANS_max = meta.ix['sum_transcripts', s_groups_1st.index].astype(float).max()
for pos, ix in enumerate(s_groups_1st.index):
val = float(meta.ix['sum_transcripts', ix])
axTRANS.axvspan(pos, pos + 1, color = cmapTRANS(val / TRANS_max))
remove_ticks(axTRANS, linewidth = 1)
clean_axis(axTRANS)
figname = 'v1.8_S2_D_Heatmap.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
TRANS_max = meta.ix['sum_transcripts', s_groups_1st.index].astype(float).max()
cmapTRANS = plt.cm.Reds
#generate figure
plt.figure(facecolor = 'w', figsize = (8, 1))
axLabel = plt.subplot()
axLabel.set_xlim(2000, 38000)
for pos in np.arange(2000, 38000, 1):
axLabel.axvspan(pos, pos+1, color = cmapTRANS(pos / TRANS_max))
axLabel.yaxis.set_ticks([])
axLabel.xaxis.set_ticks_position('top')
clean_axis(axLabel)
axLabel.set_xticks([2000, 10000, 20000, 30000, 38000])
axLabel.set_xticklabels([2000, 10000, 20000, 30000, 38000], family = 'Liberation Sans',
fontsize = 30, ha = 'center', rotation = 'vertical')
axLabel.tick_params(axis='x', which='major', pad=10)
figname = 'v1.8_S2_D_Legend_Transcripts.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
SIZE_max = meta.ix['diameter(um)', s_groups_1st.index].astype(float).max()
cmapSIZE = plt.cm.Greens
#generate figure
fig = plt.figure(facecolor = 'w', figsize = (8, 1))
axLabel = plt.subplot()
axLabel.set_xlim(5, 25)
for pos in np.arange(5, 25, 0.1):
axLabel.axvspan(pos, pos+1, color = cmapSIZE(pos / SIZE_max))
axLabel.yaxis.set_ticks([])
axLabel.xaxis.set_ticks_position('top')
clean_axis(axLabel)
axLabel.set_xticks([5, 7.5, 10, 12.5, 15, 17.5, 20, 22.5, 25])
axLabel.set_xticklabels([5.0, 7.5, 10.0, 12.5, 15.0, 17.5, 20.0, 22.5, 25.0], family = 'Liberation Sans',
fontsize = 30, ha = 'center', rotation = 'vertical')
axLabel.tick_params(axis='x', which='major', pad=10)
figname = 'v1.8_S2_D_Legend_Diameter.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
#generate figure
fig = plt.figure(facecolor = 'w', figsize = (8, 1))
axLabel = plt.subplot()
axLabel.set_xlim(0, 1)
for pos in np.arange(0, 1, 0.01):
axLabel.axvspan(pos, pos+1, color = plt.cm.jet(pos))
axLabel.yaxis.set_ticks([])
axLabel.xaxis.set_ticks_position('top')
clean_axis(axLabel)
axLabel.set_xticks([0,1])
axLabel.set_xticklabels(['min','max'], family = 'Liberation Sans',
fontsize = 30, ha = 'center', rotation = 'horizontal')
axLabel.tick_params(axis='x', which='major', pad=10)
figname = 'v1.8_S2_D_Legend_Expression.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
#initialize figure
height = 10
width = 1
fig = plt.figure(facecolor = 'w', figsize = (width, height))
#draw
axLabel = plt.subplot()
axLabel.matshow(np.matrix(np.arange(1.0, 0.0, -0.001)).T,
cmap = plt.cm.jet, aspect = 'auto', vmin = 0.0, vmax = 1.0)
axLabel.xaxis.set_ticks([])
axLabel.yaxis.set_ticks_position('right')
clean_axis(axLabel)
axLabel.set_yticks([axLabel.get_ylim()[0], axLabel.get_ylim()[1]])
axLabel.set_yticklabels([0,'max'], family = 'Liberation Sans', fontsize = 30, va = 'center')
axLabel.tick_params(axis='y', which='major', pad=10)
axLabel.set_ylabel('Gene expression [$\log_2$]', family = 'Liberation Sans', fontsize = 40)
axLabel.yaxis.set_label_coords(2, 0.5)
figname = 'v1.8_S3_B_Legend_Gene_expression.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
robustness_1st = loadData_v1(path_output, exp_id, 'robustness_1st', 'DataFrame')
robustness_1st_null = loadData_v1(path_output, exp_id, 'robustness_1st_null', 'DataFrame')
#generate figure
fig = plt.figure(facecolor = 'w', figsize = (15, 10))
gs = plt.GridSpec(nrows = 2, ncols = 1, wspace=0.0, hspace=0.0, height_ratios=[1.0, 11.0])
#generate plot
ax = plt.subplot(gs[1])
#generate xaxis
ax.set_xlim(- 0.5, len(set(s_groups_1st)) - 0.5)
ax.set_xticks(range(len(set(s_groups_1st))))
ax.set_xticklabels(np.arange(0, len(set(s_groups_1st))) + 1, family = 'Liberation Sans', fontsize = 35)
ax.set_xlabel('Affinity propagation cluster #', family = 'Liberation Sans', fontsize = 40)
ax.xaxis.set_label_coords(0.5, -0.1)
ax.tick_params(axis='x', which='major', pad=10)
#generate yaxis
ax.set_ylim(0, 1)
ax.set_yticks(np.arange(0, 1.1, 0.1))
ax.set_yticklabels([0.0,'',0.2,'',0.4,'',0.6,'',0.8,'',1.0], family = 'Liberation Sans', fontsize = 35)
ax.set_ylabel('Fraction of cell retained', family = 'Liberation Sans', fontsize = 40)
ax.yaxis.set_label_coords(-0.075, 0.5)
ax.tick_params(axis='y', which='major', pad=10)
#plot data
for ix, gr in enumerate(return_unique(s_groups_1st)):
ax.scatter([ix - (np.random.random() * 0.4 - 0.2) for pos in robustness_1st[str(gr)]],
robustness_1st[str(gr)],
s = 30,
color = 'blue')
ax.plot([ix - 0.25, ix + 0.25],
[np.mean(robustness_1st[str(gr)])] * 2,
linewidth = 5,
color = 'black')
ax.scatter([ix - (np.random.random() * 0.4 - 0.2) for pos in robustness_1st[str(gr)]],
robustness_1st_null[str(gr)],
s = 30,
color = 'red')
ax.plot([ix - 0.25, ix + 0.25],
[np.mean(robustness_1st_null[str(gr)])] * 2,
linewidth = 5,
color = 'black')
#plot color symbols
ax_Sym = plt.subplot(gs[0])
ax_Sym.set_xlim(- 0.5, len(set(s_groups_1st)) - 0.5)
ax_Sym.set_ylim(-0.5, 0.5)
for ix, gr in enumerate(return_unique(s_groups_1st)):
ax_Sym.scatter(ix,
0.0,
marker = 's',
s = 1000,
color = cmap_1st[gr])
clean_axis(ax_Sym)
figname = 'v1.8_S2_E_Robustness.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
cells_BS = pd.read_csv('%s/cells_ordercluster_backspin_lev5_5000_oct28_2014.txt' % path_output,
sep='\t', index_col = None, header = None, squeeze = True)
cells_BS1 = pd.Series(cells_BS.ix[1].values, index = cells_BS.ix[0])
cells_BS2 = pd.Series(cells_BS.ix[2].values, index = cells_BS.ix[0])
#generate matrix
AP_BS_1 = pd.DataFrame(columns = set(s_groups_1st), index = set(cells_BS.ix[1])).fillna(0)
for gr in set(s_groups_1st):
ix_sel = s_groups_1st[s_groups_1st == gr].index
counter_tmp = Counter(cells_BS1[ix_sel])
for key in [key for key in counter_tmp.keys() if type(key) == str]:
AP_BS_1.ix[key, gr] = float(counter_tmp[key]) / len(ix_sel)
#order groups
AP_BS_1 = AP_BS_1.ix[['4','1','2','8','3','5','6','7'], return_unique(s_groups_1st)]
data = AP_BS_1
#generate figure
fig = plt.figure(facecolor = 'w', figsize = (15, 10))
gs = plt.GridSpec(nrows = 2, ncols = 1, wspace=0.0, hspace=0.0, height_ratios=[1.0, 11.0])
#generate plot
ax = plt.subplot(gs[1])
#generate xaxis
ax.set_xlim(-0.5, len(data.columns) - 0.5)
ax.set_xticks(np.arange(0, len(data.columns)))
ax.set_xticklabels(np.arange(0, len(data.columns)) + 1, family = 'Liberation Sans', fontsize = 35)
ax.set_xlabel('Affinity propagation cluster #', family = 'Liberation Sans', fontsize = 40)
ax.xaxis.set_label_coords(0.5, -0.1)
ax.tick_params(axis='x', which='major', pad=10)
#generate yaxis
ax.set_ylim(len(data.index) -0.5, -0.5)
ax.set_yticks(np.arange(0, len(data.index)))
ax.set_yticklabels(np.arange(0, len(data.index)) + 1, family = 'Liberation Sans', fontsize = 35)
ax.set_ylabel('BackSPIN cluster #', family = 'Liberation Sans', fontsize = 40)
ax.yaxis.set_label_coords(-0.05, 0.5)
ax.tick_params(axis='y', which='major', pad=10)
#plot data
ax.imshow(AP_BS_1,
cmap = plt.cm.Blues,
aspect = 'auto',
interpolation = 'nearest',
vmin = 0, vmax = 1)
#plot color symbols
ax_Sym = plt.subplot(gs[0])
ax_Sym.set_xlim(- 0.5, len(set(s_groups_1st)) - 0.5)
ax_Sym.set_ylim(-0.5, 0.5)
for ix, gr in enumerate(return_unique(s_groups_1st)):
ax_Sym.scatter(ix,
0.0,
marker = 's',
s = 1000,
color = cmap_1st[gr])
clean_axis(ax_Sym)
figname = 'v1.8_S2_F_AP_vs_BackSPIN.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
#initialize figure
height = 5
width = 1
fig = plt.figure(facecolor = 'w', figsize = (width, height))
#draw
axLabel = plt.subplot()
axLabel.matshow(np.matrix(np.arange(100, 0, -10)).T,
cmap = plt.cm.Blues, aspect = 'auto', vmin = 0.0, vmax = 100)
axLabel.xaxis.set_ticks([])
axLabel.yaxis.set_ticks_position('right')
clean_axis(axLabel)
axLabel.set_yticks([axLabel.get_ylim()[0], axLabel.get_ylim()[1]])
axLabel.set_yticklabels(['0% ','100% '], family = 'Liberation Sans', fontsize = 35, va = 'center')
plt.rcParams['ytick.major.pad'] = 10
axLabel.set_ylabel('Overlap', family = 'Liberation Sans', fontsize = 40)
axLabel.yaxis.set_label_coords(2, 0.5)
figname = 'v1.8_S2_F_Legend.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
tsne_1st = loadData_v1(path_output, exp_id, 'tsne_1st', 'DataFrame')
tsne = tsne_1st
#initialize figure
height = 17
width = 17
fig = plt.figure(facecolor = 'w', figsize = (width, height))
#define x- and y-limits
x_min, x_max = np.min(tsne['x']), np.max(tsne['x'])
y_min, y_max = np.min(tsne['y']), np.max(tsne['y'])
x_diff, y_diff = x_max - x_min, y_max - y_min
pad = 2.0
if x_diff > y_diff:
xlim = (x_min - pad, x_max + pad)
ylim = (y_min * (x_diff / y_diff) - pad, y_max * (x_diff / y_diff) + pad)
if x_diff < y_diff:
xlim = (x_min * (y_diff/x_diff) - pad, x_max * (y_diff/x_diff) + pad)
ylim = (y_min - pad, y_max + pad)
text_pad = 2
#draw groups
ax1 = plt.subplot()
ax1.set_xlim(xlim[0], xlim[1])
ax1.set_ylim(ylim[0], ylim[1])
remove_ticks(ax1)
clist_tsne = [cmap_1st[s_groups_1st[ix]] for ix in tsne.index]
ax1.scatter(tsne['x'],
tsne['y'],
s = 200,
linewidth = 0.0,
c = clist_tsne,
cmap = plt.cm.jet,
edgecolor=clist_tsne)
clean_axis(ax1)
figname = 'v1.8_1_C_t-SNE_1st.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
#initialize figure
width = 18
height = 16
fig = plt.figure(facecolor = 'w', figsize = (width, height))
gs = GridSpec(1,1)
ax0 = fig.add_subplot(gs[0])
ax0.set_ylim(12.5, -0.5)
ax0.set_xlim(0, 1)
group_list = return_unique(s_groups_1st)
x = 0.03
for pos in range(len(group_list)):
ax0.scatter(x, pos-0.05, c = cmap_1st[group_list[pos]], cmap = plt.cm.jet, marker = 's', vmin = 0, vmax = 1.0, s = 1000,
linewidth = 0.0)
ax0.text(x + 0.03, pos, nmap_1st[group_list[pos]], family = 'Liberation Sans', fontsize = 40, va = 'center', ha = 'left')
ax0.text(x + 0.65, pos, gmap_1st[group_list[pos]], fontsize = 30, family = 'Liberation Sans', va = 'center', ha = 'left',
fontstyle = 'italic')
remove_ticks(ax0)
#ax0.axvline(ax0.get_xlim()[0], color = 'black', linewidth = 2)
#ax0.axvline(ax0.get_xlim()[1], color = 'black', linewidth = 2)
#ax0.axhline(ax0.get_ylim()[0], color = 'black', linewidth = 2)
#ax0.axhline(ax0.get_ylim()[1], color = 'black', linewidth = 2)
clean_axis(ax0)
figname = 'v1.8_S2_Legend_cell_groups.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
seq = loadData_v1(path_input, exp_id, 'seq', 'DataFrame')
#use all genes with mean expression > 0.05
seq_sel_ = dropNull(seq[s_groups_1st.index], path_input, cutoff_mean=0.10)
seq_sel_ = log2Transform(seq_sel_)
#calculate group averages
group_avg = pd.DataFrame(index = seq_sel_.index, columns = set(s_groups_1st))
for group in set(s_groups_1st):
group_avg_tmp = seq_sel_[s_groups_1st[s_groups_1st == group].index].mean(axis=1)
group_avg[group] = group_avg_tmp
#create group distance matrix (euclidean distance for Ward linkage)
group_avg_dist = squareform(pdist(group_avg.T, 'euclidean'))
#create single-linkage clustering
seq_cluster = ward(group_avg_dist)
#create figure and GridSpec
height = 20
width = 12
fig = plt.figure(facecolor = 'w', figsize = (width, height))
heatmapGS = GridSpec(1,2,wspace=0.0,hspace=0.0,width_ratios=[8,4])
#create dendogramm
ax_dend = fig.add_subplot(heatmapGS[0])
den = sch.dendrogram(seq_cluster,color_threshold=np.inf, orientation='right')
leaves = den['leaves']
lines = ax_dend.get_children()[2]
lines.set_color('black')
lines.set_linewidth(3)
clean_axis(ax_dend)
#create labels
ax_label = fig.add_subplot(heatmapGS[1])
ax_label.set_ylim(bottom = 0, top = len(leaves))
ax_label.set_xlim(left = 0, right = 10)
ax_label.scatter(x = [2 for element in leaves],
y = [val + 0.5 for val in range(len(leaves))],
c = [cmap_1st[group_avg.columns[val]] for val in leaves],
vmin = 0,
vmax = 1.0,
linewidth = 0,
s = 1000,
marker = 's',
edgecolor=[cmap_1st[group_avg.columns[val]] for val in leaves])
clean_axis(ax_label)
for pos in range(len(leaves)):
ax_label.text(x = 4, y = pos + 0.5, s = nmap_1st_short[group_avg.columns[leaves[pos]]],
family = 'Liberation Sans', size = 40, va = 'center')
figname = 'v1.8_1_E_1st_Wards_Linkage.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
cmap = plt.cm.seismic
dataset = seq_sel.apply(lambda x: x / np.max(x), axis = 1)
tsne = tsne_1st
#initialize figure
height = 20
width = 20
fig = plt.figure(facecolor = 'w', figsize = (width, height))
#define x- and y-limits
x_min, x_max = np.min(tsne['x']), np.max(tsne['x'])
y_min, y_max = np.min(tsne['y']), np.max(tsne['y'])
x_diff, y_diff = x_max - x_min, y_max - y_min
pad = 2.0
if x_diff > y_diff:
xlim = (x_min - pad, x_max + pad)
ylim = (y_min * (x_diff / y_diff) - pad, y_max * (x_diff / y_diff) + pad)
if x_diff < y_diff:
xlim = (x_min * (y_diff/x_diff) - pad, x_max * (y_diff/x_diff) + pad)
ylim = (y_min - pad, y_max + pad)
text_pad = 2
#draw Krt14
gene = 'Krt14'
ax0 = plt.subplot2grid((3,3), (0,0))
ax0.set_xlim(xlim[0], xlim[1])
ax0.set_ylim(ylim[0], ylim[1])
remove_ticks(ax0)
ax0.scatter(tsne['x'],
tsne['y'],
s = 50,
linewidth = 0.0,
c = dataset.ix[gene, tsne.index],
edgecolor = [cmap(val) for val in dataset.ix[gene, tsne.index]],
cmap = cmap)
ax0.text(ax0.get_xlim()[0] + text_pad,
ax0.get_ylim()[1] + text_pad,
gene,
family = 'Liberation Sans', fontsize = 35, va = 'top', weight = 'bold')
clean_axis(ax0)
#draw Krt79
gene = 'Krt79'
ax1 = plt.subplot2grid((3,3), (1,0))
ax1.set_xlim(xlim[0], xlim[1])
ax1.set_ylim(ylim[0], ylim[1])
remove_ticks(ax1)
ax1.scatter(tsne['x'],
tsne['y'],
s = 50,
linewidth = 0.0,
c = dataset.ix[gene, tsne.index],
edgecolor = [cmap(val) for val in dataset.ix[gene, tsne.index]],
cmap = cmap)
ax1.text(ax1.get_xlim()[0] + text_pad,
ax1.get_ylim()[1] + text_pad,
gene,
family = 'Liberation Sans', fontsize = 35, va = 'top', weight = 'bold')
clean_axis(ax1)
#draw Krt6a
gene = 'Krt6a'
ax2 = plt.subplot2grid((3,3), (2,0))
ax2.set_xlim(xlim[0], xlim[1])
ax2.set_ylim(ylim[0], ylim[1])
remove_ticks(ax2)
ax2.scatter(tsne['x'],
tsne['y'],
s = 50,
linewidth = 0.0,
c = dataset.ix[gene, tsne.index],
edgecolor = [cmap(val) for val in dataset.ix[gene, tsne.index]],
cmap = cmap)
ax2.text(ax2.get_xlim()[0] + text_pad,
ax2.get_ylim()[1] + text_pad,
gene,
family = 'Liberation Sans', fontsize = 35, va = 'top', weight = 'bold')
clean_axis(ax2)
#draw Krt10
gene = 'Krt10'
ax3 = plt.subplot2grid((3,3), (0,1))
ax3.set_xlim(xlim[0], xlim[1])
ax3.set_ylim(ylim[0], ylim[1])
remove_ticks(ax3)
ax3.scatter(tsne['x'],
tsne['y'],
s = 50,
linewidth = 0.0,
c = dataset.ix[gene, tsne.index],
edgecolor = [cmap(val) for val in dataset.ix[gene, tsne.index]],
cmap = cmap)
ax3.text(ax3.get_xlim()[0] + text_pad,
ax3.get_ylim()[1] + text_pad,
gene,
family = 'Liberation Sans', fontsize = 35, va = 'top', weight = 'bold')
clean_axis(ax3)
#draw Mgst1
gene = 'Mgst1'
ax4 = plt.subplot2grid((3,3), (1,1))
ax4.set_xlim(xlim[0], xlim[1])
ax4.set_ylim(ylim[0], ylim[1])
remove_ticks(ax4)
ax4.scatter(tsne['x'],
tsne['y'],
s = 50,
linewidth = 0.0,
c = dataset.ix[gene, tsne.index],
edgecolor = [cmap(val) for val in dataset.ix[gene, tsne.index]],
cmap = cmap)
ax4.text(ax4.get_xlim()[0] + text_pad,
ax4.get_ylim()[1] + text_pad,
gene,
family = 'Liberation Sans', fontsize = 35, va = 'top', weight = 'bold')
clean_axis(ax4)
#draw Cd3g
gene = 'Cd3g'
ax5 = plt.subplot2grid((3,3), (2,1))
ax5.set_xlim(xlim[0], xlim[1])
ax5.set_ylim(ylim[0], ylim[1])
remove_ticks(ax5)
ax5.scatter(tsne['x'],
tsne['y'],
s = 50,
linewidth = 0.0,
c = dataset.ix[gene, tsne.index],
edgecolor = [cmap(val) for val in dataset.ix[gene, tsne.index]],
cmap = cmap)
ax5.text(ax5.get_xlim()[0] + text_pad,
ax5.get_ylim()[1] + text_pad,
gene,
family = 'Liberation Sans', fontsize = 35, va = 'top', weight = 'bold')
clean_axis(ax5)
#draw Lor
gene = 'Lor'
ax6 = plt.subplot2grid((3,3), (0,2))
ax6.set_xlim(xlim[0], xlim[1])
ax6.set_ylim(ylim[0], ylim[1])
remove_ticks(ax6)
ax6.scatter(tsne['x'],
tsne['y'],
s = 50,
linewidth = 0.0,
c = dataset.ix[gene, tsne.index],
edgecolor = [cmap(val) for val in dataset.ix[gene, tsne.index]],
cmap = cmap)
ax6.text(ax6.get_xlim()[0] + text_pad,
ax6.get_ylim()[1] + text_pad,
gene,
family = 'Liberation Sans', fontsize = 35, va = 'top', weight = 'bold')
clean_axis(ax6)
#draw Postn
gene = 'Postn'
ax7 = plt.subplot2grid((3,3), (1,2))
ax7.set_xlim(xlim[0], xlim[1])
ax7.set_ylim(ylim[0], ylim[1])
remove_ticks(ax7)
ax7.scatter(tsne['x'],
tsne['y'],
s = 50,
linewidth = 0.0,
c = dataset.ix[gene, tsne.index],
edgecolor = [cmap(val) for val in dataset.ix[gene, tsne.index]],
cmap = cmap)
ax7.text(ax7.get_xlim()[0] + text_pad,
ax7.get_ylim()[1] + text_pad,
gene,
family = 'Liberation Sans', fontsize = 35, va = 'top', weight = 'bold')
clean_axis(ax7)
plt.subplots_adjust(wspace = 0.05, hspace = 0.05)
#draw Cd207
gene = 'Cd207'
ax8 = plt.subplot2grid((3,3), (2,2))
ax8.set_xlim(xlim[0], xlim[1])
ax8.set_ylim(ylim[0], ylim[1])
remove_ticks(ax8)
ax8.scatter(tsne['x'],
tsne['y'],
s = 50,
linewidth = 0.0,
c = dataset.ix[gene, tsne.index],
edgecolor = [cmap(val) for val in dataset.ix[gene, tsne.index]],
cmap = cmap)
ax8.text(ax8.get_xlim()[0] + text_pad,
ax8.get_ylim()[1] + text_pad,
gene,
family = 'Liberation Sans', fontsize = 35, va = 'top', weight = 'bold')
clean_axis(ax8)
plt.subplots_adjust(wspace = 0.05, hspace = 0.05)
figname = 'v1.8_1_D_t-SNE_1st_markers.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
#initialize figure
height = 10
width = 1
fig = plt.figure(facecolor = 'w', figsize = (width, height))
#draw
axLabel = plt.subplot()
axLabel.matshow(np.matrix(np.arange(1.0, 0.0, -0.001)).T,
cmap = plt.cm.seismic, aspect = 'auto', vmin = 0.0, vmax = 1.0)
axLabel.xaxis.set_ticks([])
axLabel.yaxis.set_ticks_position('right')
clean_axis(axLabel)
axLabel.set_yticks([axLabel.get_ylim()[0], axLabel.get_ylim()[1]])
axLabel.set_yticklabels(['0','max'], family = 'Liberation Sans', fontsize = 30, va = 'center')
axLabel.tick_params(axis='y', which='major', pad=10)
axLabel.set_ylabel('Gene expression [$\log_2$]', family = 'Liberation Sans', fontsize = 40)
axLabel.yaxis.set_label_coords(2, 0.5)
"""
figname = 'v1.7_1_D1_Legend_expression.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
"""
#initialize figure
height = 1
width = 10
fig = plt.figure(facecolor = 'w', figsize = (width, height))
#draw
axLabel = plt.subplot()
axLabel.matshow(np.matrix(np.arange(0.0, 1.0, 0.001)),
cmap = plt.cm.seismic, aspect = 'auto', vmin = 0.0, vmax = 1.0)
axLabel.yaxis.set_ticks([])
axLabel.xaxis.set_ticks_position('bottom')
clean_axis(axLabel)
axLabel.set_xticks([axLabel.get_xlim()[0], axLabel.get_xlim()[1]])
axLabel.set_xticklabels(['0','max'], family = 'Liberation Sans', fontsize = 35, va = 'center')
axLabel.tick_params(axis='x', which='major', pad=25)
axLabel.set_xlabel('Gene expression [$\log_2$]', family = 'Liberation Sans', fontsize = 40)
axLabel.xaxis.set_label_coords(0.5, -0.4)
figname = 'v1.8_1_D_Legend_expression.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
NBR_1st_traces = loadData_from_pickle_v1(path_output, exp_id, 'NBR_1st_traces')
NBR_1st_summary = loadData_from_pickle_v1(path_output, exp_id,'NBR_1st_summary')
NBR_1st_bin_bl = loadData_from_pickle_v1(path_output,exp_id,'NBR_1st_bin_bl')
NBR_1st_size_bl = loadData_from_pickle_v1(path_output,exp_id,'NBR_1st_size_bl')
NBR_1st_bin_gr = loadData_from_pickle_v1(path_output,exp_id,'NBR_1st_bin_gr')
NBR_1st_size_gr = loadData_from_pickle_v1(path_output,exp_id,'NBR_1st_size_gr')
genes_baseline = neg_binom_extract_genes(NBR_1st_size_bl, score = 'percentile - 25', number = 10)
#define input variables
gr_order = return_unique(s_groups_1st)
genes = genes_baseline
L = len(genes.index)
traces = NBR_1st_traces
cmap = cmap_1st
#initialize figure
height = 4.8 * len(gr_order)
width = 8
plt.figure(facecolor = 'w', figsize = (width, height))
gs = plt.GridSpec(len(gr_order), 2, hspace = 0.15, wspace = 0.0, width_ratios = [5,8])
#iterate through groups
for pos, gr in enumerate(gr_order):
genes_tmp = [gene for gene in genes[str(gr)].values if gene != None]
#create gene name axis
ax0 = plt.subplot(gs[pos,0])
ax0.set_ylim(L - 0.5, -0.5)
clean_axis(ax0)
#create violin plot axis
ax1 = plt.subplot(gs[pos,1])
ax1.spines['right'].set_color('none')
ax1.set_ylim(L - 0.5, -0.5)
ax1.yaxis.set_ticks_position('left')
ax1.yaxis.set_ticks([])
#iterate over genes
for ix, g in enumerate(genes_tmp):
ax0.text(0.975, ix, g, family = 'Liberation Sans', fontsize = 30, ha = 'right', va = 'center')
#print background
if ix % 2 == 0:
ax1.axhspan(ix - 0.5, ix + 0.5, color = '#FFFFFF', zorder = 0)
if ix % 2 == 1:
ax1.axhspan(ix - 0.5, ix + 0.5, color = '#E6E7E8', zorder = 0)
#print group traces
violin = ax1.violinplot([x - 1 for x in traces.ix[g, str(gr)]],
positions = [ix],
vert=False,
showextrema=False)
for v in violin['bodies']:
v.set_edgecolor(cmap[gr])
v.set_facecolor(cmap[gr])
v.set_linewidth(1.0)
v.set_alpha(1.0)
#print baseline traces
violin = ax1.violinplot([x - 1 for x in traces.ix[g, 'Baseline']],
positions = [ix],
vert=False,
showextrema=False)
for v in violin['bodies']:
v.set_edgecolor('#808284')
v.set_facecolor('#808284')
v.set_linewidth(1.0)
v.set_alpha(1.0)
for ix, t in enumerate(ax1.xaxis.get_ticklabels()):
if ix % 2 == 0:
t.set_family('Liberation Sans')
t.set_fontsize(25)
else:
t.set_fontsize(0)
figname = 'v1.8_S2_G_Markers_vs_Baseline.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
genes_groups = neg_binom_extract_genes(NBR_1st_size_gr, score = 'percentile - 25', number = 10)
#define input variables
gr_order = return_unique(s_groups_1st)
genes = genes_groups
L = len(genes.index)
traces = NBR_1st_traces
summary = NBR_1st_summary
cmap = cmap_1st
#initialize figure
height = 4.8 * len(gr_order)
width = 8
plt.figure(facecolor = 'w', figsize = (width, height))
gs = plt.GridSpec(len(gr_order), 2, hspace = 0.15, wspace = 0.0, width_ratios = [5,8])
#iterate through groups
for pos, gr in enumerate(gr_order):
genes_tmp = [gene for gene in genes[str(gr)].values if pd.isnull(gene) == False]
#create gene name axis
ax0 = plt.subplot(gs[pos,0])
ax0.set_ylim(L - 0.5, -0.5)
clean_axis(ax0)
#create violin plot axis
ax1 = plt.subplot(gs[pos,1])
ax1.spines['right'].set_color('none')
ax1.set_ylim(L - 0.5, -0.5)
ax1.yaxis.set_ticks_position('left')
ax1.yaxis.set_ticks([])
if len(genes_tmp) == 0:
remove_ticks(ax1)
#iterate over genes
for ix, g in enumerate(genes_tmp):
ax0.text(0.975, ix, g, family = 'Liberation Sans', fontsize = 30, ha = 'right', va = 'center')
#print background
if ix % 2 == 0:
ax1.axhspan(ix - 0.5, ix + 0.5, color = '#FFFFFF', zorder = 0)
if ix % 2 == 1:
ax1.axhspan(ix - 0.5, ix + 0.5, color = '#E6E7E8', zorder = 0)
#print group traces
violin = ax1.violinplot([x - 1 for x in traces.ix[g, str(gr)]],
positions = [ix],
vert=False,
showextrema=False)
for v in violin['bodies']:
v.set_edgecolor(cmap[gr])
v.set_facecolor(cmap[gr])
v.set_linewidth(1.0)
v.set_alpha(1.0)
#print 2nd group traces
gr_2nd = summary.ix[g].ix['mean'].order().index[-2]
violin = ax1.violinplot([x - 1 for x in traces.ix[g, gr_2nd]],
positions = [ix],
vert=False,
showextrema=False)
for v in violin['bodies']:
v.set_edgecolor('#808284')
v.set_facecolor('#808284')
v.set_linewidth(1.0)
v.set_alpha(1.0)
for ix, t in enumerate(ax1.xaxis.get_ticklabels()):
if ix % 2 == 0:
t.set_family('Liberation Sans')
t.set_fontsize(25)
else:
t.set_fontsize(0)
figname = 'v1.8_S2_H_Markers_vs_Groups.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
genes = ['Krt14','Ccl27a_loc2','Mt2','Mt4',
'Krt10','Ptgs1','Dsp','Krt77',
'Lor','Flg2','Elovl1','Lce1m',
'Krt79','Krt17','Defb6','Cst6',
'Mgst1','Scd1','Elovl6','Fabp5',
'Postn','Cxcl14','Cd34','Sparc',
'Krt6a','Krt75','Timp3','Wfdc3',
'Cd3g','Ctla2a','Xcl1','Nkg7',
'Cd207','H2-Aa','H2-Eb1','Cd74']
cell_groups = s_groups_1st
dataset = seq[s_groups_1st.index]
cmap = cmap_1st
#create figure
bar_size = 0.5
bar_pad = 0.5
width = 17.5
height = len(genes) * 1.2 #+ (bar_size + bar_pad)
fig = plt.figure(facecolor = 'w', figsize = (width, height))
#create GS for gene barplots
gs1 = plt.GridSpec(len(genes), 2, hspace = 0.00, wspace = 0.0, top = 1 - ((bar_pad + bar_size) / height), right = 1,
left = (bar_pad + bar_size) / width, bottom = 0, width_ratios = [14,4])
#create GS for sample group bar
gs0 = plt.GridSpec(1,2, left = (bar_pad + bar_size) / width, right = 1, top = 1,
bottom = 1 - ((bar_size) / height), hspace = 0.0, wspace = 0.0, width_ratios = [14,4])
#colorlist bar
clistBAR = [cmap[val] for val in cell_groups]
#draw genes
for g in genes:
#create group_specific mean expressions
mean_tmp = pd.Series(index = set(cell_groups))
for gr in set(cell_groups):
ix_tmp = cell_groups[cell_groups == gr].index
mean_tmp[gr] = dataset.ix[g, ix_tmp].mean()
#create white space for gene names
ax0 = plt.subplot(gs1[genes.index(g),1])
clean_axis(ax0)
#create axis
ax0 = plt.subplot(gs1[genes.index(g),0])
ax0.axvspan(0, len(dataset.columns), color = '#FFFFFF', zorder = 0)
#define xaxis
ax0.set_xlim(left = 0, right = (len(dataset.columns)))
ax0.xaxis.set_ticklabels([])
#define yaxis
ax0.set_ylim(bottom = 0, top = np.ceil(np.max(mean_tmp) * 2))
ax0.set_ylabel(g, family = 'Liberation Sans', fontsize = 40, rotation = 'horizontal', va = 'center', ha = 'left')
ax0.yaxis.set_label_coords(1.02, 0.5)
for tick_pos in range(len(ax0.yaxis.get_major_ticks())):
if tick_pos == 0 or tick_pos == len(ax0.yaxis.get_major_ticks()) / 2:
ax0.yaxis.get_major_ticks()[tick_pos].label.set_family('Liberation Sans')
ax0.yaxis.get_major_ticks()[tick_pos].label.set_fontsize(35)
else:
ax0.yaxis.get_major_ticks()[tick_pos].label.set_visible(False)
#draw expression data
ax0.bar(np.arange(0, len(dataset.columns),1), dataset.ix[g], color = clistBAR, linewidth = 0, width = 1.0)
#draw means
ax0.step(range(len(cell_groups)), [mean_tmp[val] for val in cell_groups], where = 'post', color = 'black', linewidth = 2)
ax1 = plt.subplot(gs0[0,0])
ax1.set_xlim(0, len(cell_groups))
for s_gr in enumerate(cell_groups):
ax1.axvspan(xmin = s_gr[0], xmax = s_gr[0] + 1, color = cmap[s_gr[1]])
remove_ticks(ax1, linewidth = 1)
clean_axis(ax1)
figname = 'v1.8_S2_I_Barplot.pdf'
plt.savefig('%s/%s' % (path_figures, figname),
format = 'pdf',
transparent = True,
bbox_inches = 'tight',
pad_inches = 0,
rasterized = True)
seq_ = dropNull(seq, path_input, cutoff_mean = 0.00)
m_ix1, m_ix2 = [],[]
for gr in return_unique(s_groups_1st):
m_ix1 += [nmap_1st_short[gr]] * 2
m_ix2 += ['vs Baseline','vs other groups']
NBR_1st_excel = pd.DataFrame(index = seq_.index, columns = [np.array(m_ix1), np.array(m_ix2)])
ix_sel = NBR_1st_size_bl.index.levels[0]
for gr in return_unique(s_groups_1st):
NBR_1st_excel[nmap_1st_short[gr], 'vs Baseline'].ix[ix_sel] = NBR_1st_size_bl.swaplevel(0,1).ix['percentile - 25'].ix[ix_sel][str(gr)]
NBR_1st_excel[nmap_1st_short[gr], 'vs other groups'].ix[ix_sel] = NBR_1st_size_gr.swaplevel(0,1).ix['percentile - 25'].ix[ix_sel][str(gr)]
NBR_1st_excel[NBR_1st_excel=='n.s'] = 'n.s.'
NBR_1st_excel = NBR_1st_excel.fillna('b.c.')
#saveData_v1(NBR_1st_excel, path_output, exp_id, 'NBR_1st_excel')